TFGfinal

Librerias

library(pls) 

Attaching package: 'pls'
The following object is masked from 'package:stats':

    loadings
library(readxl)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(ggplot2)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ lubridate 1.9.3     ✔ tibble    3.2.1
✔ purrr     1.0.2     ✔ tidyr     1.3.1
✔ readr     2.1.5     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)#usar filter
library(sf)
Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(spdep)
Loading required package: spData
To access larger datasets in this package, install the spDataLarge
package with: `install.packages('spDataLarge',
repos='https://nowosad.github.io/drat/', type='source')`
library(jsonlite)

Attaching package: 'jsonlite'

The following object is masked from 'package:purrr':

    flatten
library(leaflet)
library(ggpubr)
library(dplyr)
library(quarto)
library(mgcv)
Loading required package: nlme

Attaching package: 'nlme'

The following object is masked from 'package:dplyr':

    collapse

This is mgcv 1.9-3. For overview type 'help("mgcv-package")'.
library(car)
Loading required package: carData

Attaching package: 'car'

The following object is masked from 'package:purrr':

    some

The following object is masked from 'package:dplyr':

    recode
library(performance)
library(betaselectr)
library(ggpubr)
library(dygraphs)
library(xts)
Loading required package: zoo

Attaching package: 'zoo'

The following objects are masked from 'package:base':

    as.Date, as.Date.numeric


######################### Warning from 'xts' package ##########################
#                                                                             #
# The dplyr lag() function breaks how base R's lag() function is supposed to  #
# work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
# source() into this session won't work correctly.                            #
#                                                                             #
# Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
# conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
# dplyr from breaking base R's lag() function.                                #
#                                                                             #
# Code in packages is not affected. It's protected by R's namespace mechanism #
# Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
#                                                                             #
###############################################################################

Attaching package: 'xts'

The following object is masked from 'package:leaflet':

    addLegend

The following objects are masked from 'package:dplyr':

    first, last
library(vtable) 
Loading required package: kableExtra

Attaching package: 'kableExtra'

The following object is masked from 'package:dplyr':

    group_rows
library(glmmTMB)

Datos y Limpieza

qqnorm pero qqbeta. x cuantiles de la beta de un continente y en y cuntiles de la muestra y en otro color los cuantiles de la normal

A continuación, se procede a la carga de las bases de datos correspondientes al continente africano. Se han utilizado dos data frames separados, ya que durante el proceso de descarga algunos países no fueron seleccionados inicialmente. Por este motivo, se optó por mantener ambas bases de datos por separado para conservar la coherencia y facilitar su posterior manejo.

IDHAFRICA1<-read_xlsx("IDHAFRICA.xlsx")

IDHAFRICAFALTANTES1<-read_xlsx("AFRICAF.xlsx")
# Añadir la columna "CONTINENTE"
IDHAFRICA1$CONTINENTE <- "AFRICA"
IDHAFRICAFALTANTES1$CONTINENTE <- "AFRICA"

Cargo los datos de IDH de Europa:

IDHEUROPA1<-read_xlsx("IDHEUROPA.xlsx")
IDHEUROPA1$CONTINENTE <- "EUROPA"

Borro columnas de información irrelevante y ordeno alfabéticamente las dos bases de datos.

IDHAFRICAFALTANTES1 <- IDHAFRICAFALTANTES1 %>% #borro columnas
  select(-3, -4, -5,-6,-10)
IDHAFRICA1 <- IDHAFRICA1 %>% 
  select(-3, -4, -5,-6,-10) #borro columnas
IDHAFRICAFALTANTES1 <- IDHAFRICAFALTANTES1 %>% 
   arrange(IDHAFRICAFALTANTES1[[1]])
IDHAFRICA1 <- IDHAFRICA1 %>%
  arrange(IDHAFRICA1[[1]]) #ordeno alf abeticamente
IDHEUROPA1 <- IDHEUROPA1 %>%
  select(-3, -4, -5,-6,-10)
IDHEUROPA1<- IDHEUROPA1%>%
  arrange(IDHEUROPA1[[1]])

Borro bases de datos del 1990-1999 debido a que muchas variables utilizadas más adelante solo disponian de datos del 2000 al 2020.

IDHEUROPA <- IDHEUROPA1 %>%
  filter(!(year >= 1990 & year <= 1999))
IDHAFRICA <- IDHAFRICA1 %>%
  filter(!(year >= 1990 & year <= 1999))
IDHAFRICAFALTANTES <- IDHAFRICAFALTANTES1 %>%
  filter(!(year >= 1990 & year <= 1999))

Cargo la base de datos obtenidas del centro de datos de la ONU y realizamos

CARACTEUROPA <- read_xlsx("CARACTEUROPA.xlsx")
CARACTAFRICA <- read_xlsx("CARACTAFRICA.xlsx")
CARACTEUROPA<-  CARACTEUROPA %>%
  select(-3, -4, -5,-6,-10)
CARACTEUROPA <- CARACTEUROPA%>%
  arrange(CARACTEUROPA[[1]])
CARACTAFRICA <- CARACTAFRICA %>%
  select(-3, -4, -5,-6,-10)
CARACTAFRICA <- CARACTAFRICA%>%
  arrange(CARACTAFRICA[[1]])
CARACTAFRICA$CONTINENTE <- "AFRICA"
CARACTEUROPA$CONTINENTE <- "EUROPA"
GASTOSALUD<- read_xlsx("datosgastosalud.xlsx")
GASTOSALUD <- GASTOSALUD[,-c(3:44)]
AccesoElectricidad <- read_xlsx("Acceso electricidad.xlsx")
AGUAPOTABLE <-  read_xlsx("AGUA POTABLE.xlsx")
AGUAPOTABLE <-AGUAPOTABLE[,-c(3:44)]
CPI<-read_csv("cpi.csv")
Rows: 182 Columns: 31
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (3): Country / Territory, ISO3, Region
dbl (28): 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015, 2014, 2013, 2012, ...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
years_to_normalize <- as.character(1995:2011)

# Multiplicar por 10 los valores de esos años
CPI[years_to_normalize] <- CPI[years_to_normalize] * 10
CPI <-CPI [,-3]
names(CPI)[1] <- "Country Name"
names(CPI)[2] <- "ISO"
CARACTEUROPA <- read_xlsx("CARACTEUROPA.xlsx")
CARACTAFRICA <- read_xlsx("CARACTAFRICA.xlsx")
CARACTEUROPA<-  CARACTEUROPA %>%
  select(-3, -4, -5,-6,-10)
CARACTEUROPA <- CARACTEUROPA%>%
  arrange(CARACTEUROPA[[1]])
CARACTAFRICA <- CARACTAFRICA %>%
  select(-3, -4, -5,-6,-10)
CARACTAFRICA <- CARACTAFRICA%>%
  arrange(CARACTAFRICA[[1]])
CARACTAFRICA$CONTINENTE <- "AFRICA"
CARACTEUROPA$CONTINENTE <- "EUROPA"
IDHAFRICA<- IDHAFRICA %>%
  pivot_wider(
    names_from = indicator,   # Los valores de la columna 'indicator' serán las nuevas columnas
    values_from = value       # Los valores de la columna 'value' serán los que se asignen a las nuevas columnas
  )

# Ver el resultado
head(IDHAFRICA)
# A tibble: 6 × 10
  countryIsoCode country year  CONTINENTE `Expected Years of Schooling (years)`
  <chr>          <chr>   <chr> <chr>                                      <dbl>
1 AGO            Angola  2000  AFRICA                                      4.82
2 AGO            Angola  2001  AFRICA                                      5.25
3 AGO            Angola  2002  AFRICA                                      5.68
4 AGO            Angola  2003  AFRICA                                      6.10
5 AGO            Angola  2004  AFRICA                                      6.53
6 AGO            Angola  2005  AFRICA                                      6.96
# ℹ 5 more variables: `Gross National Income Per Capita (2017 PPP$)` <dbl>,
#   `Human Development Index (value)` <dbl>, `HDI Rank` <dbl>,
#   `Life Expectancy at Birth (years)` <dbl>,
#   `Mean Years of Schooling (years)` <dbl>
#sigo sin juntarlo porque ocurren problemas
IDHAFRICAFALTANTES<- IDHAFRICAFALTANTES%>%
  pivot_wider(
    names_from = indicator,   # Los valores de la columna 'indicator' serán las nuevas columnas
    values_from = value       # Los valores de la columna 'value' serán los que se asignen a las nuevas columnas
  )

# Ver el resultado
head(IDHAFRICAFALTANTES)
# A tibble: 6 × 10
  countryIsoCode country       year  CONTINENTE Expected Years of Schooling (y…¹
  <chr>          <chr>         <chr> <chr>                                 <dbl>
1 CIV            Côte d'Ivoire 2000  AFRICA                                 6.49
2 CIV            Côte d'Ivoire 2001  AFRICA                                 6.62
3 CIV            Côte d'Ivoire 2002  AFRICA                                 6.74
4 CIV            Côte d'Ivoire 2003  AFRICA                                 6.87
5 CIV            Côte d'Ivoire 2004  AFRICA                                 7.00
6 CIV            Côte d'Ivoire 2005  AFRICA                                 7.13
# ℹ abbreviated name: ¹​`Expected Years of Schooling (years)`
# ℹ 5 more variables: `Gross National Income Per Capita (2017 PPP$)` <dbl>,
#   `Human Development Index (value)` <dbl>, `HDI Rank` <dbl>,
#   `Life Expectancy at Birth (years)` <dbl>,
#   `Mean Years of Schooling (years)` <dbl>
# Guardar el resultado en un nuevo archivo
write.csv(IDHAFRICAFALTANTES, "IDHAFRICAFALTANTES_pivoted.csv", row.names = FALSE)
IDHEUROPA<- IDHEUROPA %>%
  pivot_wider(
    names_from = indicator,   # Los valores de la columna 'indicator' serán las nuevas columnas
    values_from = value       # Los valores de la columna 'value' serán los que se asignen a las nuevas columnas
  )

# Ver el resultado
head(IDHEUROPA)
# A tibble: 6 × 10
  countryIsoCode country year  CONTINENTE `Expected Years of Schooling (years)`
  <chr>          <chr>   <chr> <chr>                                      <dbl>
1 ALB            Albania 2000  EUROPA                                      10.7
2 ALB            Albania 2001  EUROPA                                      10.9
3 ALB            Albania 2002  EUROPA                                      11.0
4 ALB            Albania 2003  EUROPA                                      11.4
5 ALB            Albania 2004  EUROPA                                      11.5
6 ALB            Albania 2005  EUROPA                                      12.2
# ℹ 5 more variables: `Gross National Income Per Capita (2017 PPP$)` <dbl>,
#   `Human Development Index (value)` <dbl>, `HDI Rank` <dbl>,
#   `Life Expectancy at Birth (years)` <dbl>,
#   `Mean Years of Schooling (years)` <dbl>
write.csv(IDHEUROPA, "IDHEUROPE_pivoted.csv", row.names = FALSE)
CARACTEUROPA<- CARACTEUROPA %>%
  pivot_wider(
    names_from = indicator,   # Los valores de la columna 'indicator' serán las nuevas columnas
    values_from = value       # Los valores de la columna 'value' serán los que se asignen a las nuevas columnas
  )

# Ver el resultado
head(CARACTEUROPA)
# A tibble: 6 × 16
  countryIsoCode country year  CONTINENTE Carbon dioxide emissions per capita …¹
  <chr>          <chr>   <chr> <chr>                                       <dbl>
1 ALB            Albania 1990  EUROPA                                      1.68 
2 ALB            Albania 1991  EUROPA                                      1.30 
3 ALB            Albania 1992  EUROPA                                      0.762
4 ALB            Albania 1993  EUROPA                                      0.708
5 ALB            Albania 1994  EUROPA                                      0.584
6 ALB            Albania 1995  EUROPA                                      0.636
# ℹ abbreviated name:
#   ¹​`Carbon dioxide emissions per capita (production) (tonnes)`
# ℹ 11 more variables: `Coefficient of human inequality` <dbl>,
#   `Gender Inequality Index (value)` <dbl>, `Inequality in eduation` <dbl>,
#   `Inequality in income` <dbl>, `Inequality in life expectancy` <dbl>,
#   `Labour force participation rate, female (% ages 15 and older)` <dbl>,
#   `Labour force participation rate, male (% ages 15 and older)` <dbl>, …
write.csv(CARACTEUROPA, "CARACTEUROPA_pivoted.csv", row.names = FALSE)
CARACTAFRICA<- CARACTAFRICA %>%
  pivot_wider(
    names_from = indicator,   # Los valores de la columna 'indicator' serán las nuevas columnas
    values_from = value       # Los valores de la columna 'value' serán los que se asignen a las nuevas columnas
  )

# Ver el resultado
head(CARACTAFRICA)
# A tibble: 6 × 16
  countryIsoCode country year  CONTINENTE Carbon dioxide emissions per capita …¹
  <chr>          <chr>   <chr> <chr>                                       <dbl>
1 AGO            Angola  1990  AFRICA                                      0.43 
2 AGO            Angola  1991  AFRICA                                      0.414
3 AGO            Angola  1992  AFRICA                                      0.409
4 AGO            Angola  1993  AFRICA                                      0.441
5 AGO            Angola  1994  AFRICA                                      0.843
6 AGO            Angola  1995  AFRICA                                      0.907
# ℹ abbreviated name:
#   ¹​`Carbon dioxide emissions per capita (production) (tonnes)`
# ℹ 11 more variables: `Coefficient of human inequality` <dbl>,
#   `Gender Inequality Index (value)` <dbl>, `Inequality in eduation` <dbl>,
#   `Inequality in income` <dbl>, `Inequality in life expectancy` <dbl>,
#   `Labour force participation rate, female (% ages 15 and older)` <dbl>,
#   `Labour force participation rate, male (% ages 15 and older)` <dbl>, …
write.csv(CARACTAFRICA, "CARACTAFRICA_pivoted.csv", row.names = FALSE)
IDH <- rbind(IDHAFRICA, IDHEUROPA,IDHAFRICAFALTANTES)
CARACT <- rbind(CARACTEUROPA, CARACTAFRICA)
IDH <- IDH %>%
  arrange(IDH[[1]]) 
CARACT <- CARACT %>%
  arrange(CARACT[[1]]) 
unique_isocodes <- n_distinct(IDH$countryIsoCode)
unique_isocodes #hay103 paises
[1] 103
unique_isocodes1<- n_distinct(CARACT$countryIsoCode)
unique_isocodes1#hay103 paises con lo cual concuerdaa
[1] 103
#Cambiamos los nombres columnas 
colnames(IDH)[7] <- "HDI" 
colnames(IDH)[1] <- "ISO"
colnames(IDH)[5] <- "EYS" #Expected years of schooling
colnames(IDH)[6] <- "GNP"#GROS NATIONAL INCOME
colnames(IDH)[9] <- "LEB" #life expectancy at birth
colnames(IDH)[10] <- "MYS" #mean years of schooling
colnames(CARACT)[1] <- "ISO"
colnames(CARACT)[5] <- "CO2"
colnames(CARACT)[6] <- "CHI" #Coefficient of human inequality"
colnames(CARACT)[7] <- "GII" #Gender Inequality Index
colnames(CARACT)[8] <- "IE" #Inequality in eduation
colnames(CARACT)[9] <- "II" #Inequality in income
colnames(CARACT)[10] <- "ILE" #Inequality in life expectancy
colnames(CARACT)[11] <- "LFPF" #Labour force participation rate, female(relacionado conel GII) describe el empowerment
colnames(CARACT)[12] <- "LFPM" #Labour force participation rate, male(relacionado conel GII)
colnames(CARACT)[13] <- "MFC" #Material footprint per capita (tonnes)
colnames(CARACT)[14] <- "MMR" #Maternal Mortality Ratio (relacionado conel GII)
colnames(CARACT)[15] <- "SSPF" #Share of seats in parliament, female(relacionado conel GII)
colnames(CARACT)[16] <- "SSPM" #Share of seats in parliament, male (relacionado conel GII)
paisesmapa <- st_read("mapabueno1.geojson")
Reading layer `mapabueno1' from data source 
  `/Users/belenroderorodriguez/Documents/TFG/datos/mapabueno1.geojson' 
  using driver `GeoJSON'
Simple feature collection with 256 features and 9 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -180 ymin: -58.49861 xmax: 180 ymax: 83.6236
Geodetic CRS:  WGS 84
#borramos monaco y andorra 
IDH_filtrado <- IDH[!IDH$ISO %in% c("AND", "MCO"), ]
head(IDH_filtrado)
# A tibble: 6 × 10
  ISO   country year  CONTINENTE   EYS   GNP   HDI `HDI Rank`   LEB   MYS
  <chr> <chr>   <chr> <chr>      <dbl> <dbl> <dbl>      <dbl> <dbl> <dbl>
1 AGO   Angola  2000  AFRICA      4.82 3913. 0.38          NA  46.0  3.43
2 AGO   Angola  2001  AFRICA      5.25 3950. 0.39          NA  46.6  3.46
3 AGO   Angola  2002  AFRICA      5.68 4659. 0.406         NA  47.4  3.50
4 AGO   Angola  2003  AFRICA      6.10 4682. 0.424         NA  49.6  3.55
5 AGO   Angola  2004  AFRICA      6.53 4926. 0.437         NA  50.6  3.59
6 AGO   Angola  2005  AFRICA      6.96 5385. 0.451         NA  51.6  3.63
IDH_filtrado <- IDH_filtrado%>%
  arrange(IDH_filtrado[[1]], IDH_filtrado[[3]])
CARACT <- CARACT[!CARACT$ISO %in% c("AND", "MCO"), ]
ISOCODE1 <- unique(CARACT$ISO)
CARACT <- CARACT %>%
  arrange(CARACT[[1]], CARACT [[3]])
GASTOSALUD <- GASTOSALUD %>%
  arrange(GASTOSALUD[[1]])
unique_isocodes <- n_distinct(IDH$ISO)
unique_isocodes 
[1] 103
unique_isocodes2<- n_distinct(GASTOSALUD$`Country Code`)
unique_isocodes2
[1] 266
GASTOSALUD <- GASTOSALUD %>%
  semi_join(IDH, by = c("Country Code" = "ISO"))
GASTOSALUD <- GASTOSALUD %>%
  pivot_longer(
    cols = 3:ncol(.),           # Desde la 3ra columna hasta el final (los años)
    names_to = "year",          # Nueva columna con los años
    values_to = "gasto_salud"   # Nueva columna con los valores del indicador
  ) %>%
  mutate(year = as.numeric(year))  # Convierte los años a numéricos

GASTOSALUD <- GASTOSALUD %>%
  left_join(CARACT %>% select(ISO, CONTINENTE) %>% distinct(), by = c("Country Code" = "ISO"))
GASTOSALUD <- GASTOSALUD %>%
  rename(ISO = `Country Code`)
CARACT$year <- as.numeric(CARACT$year)
GASTOSALUD$year <- as.numeric(GASTOSALUD$year)
names(CARACT)
 [1] "ISO"        "country"    "year"       "CONTINENTE" "CO2"       
 [6] "CHI"        "GII"        "IE"         "II"         "ILE"       
[11] "LFPF"       "LFPM"       "MFC"        "MMR"        "SSPF"      
[16] "SSPM"      
names(GASTOSALUD)
[1] "Country Name" "ISO"          "year"         "gasto_salud"  "CONTINENTE"  
CARACT <- CARACT %>%
   left_join(GASTOSALUD, by = c("ISO", "year"))
CARACT<- CARACT %>%
  select(-CONTINENTE.y) %>%  # borra la que no quieres
  rename(CONTINENTE = CONTINENTE.x)  # renombra la que te quedas
CARACT <- CARACT %>% #borro columnas
  select(-17)
AccesoElectricidad <- AccesoElectricidad %>%
  arrange(AccesoElectricidad[[1]])
unique_isocodes5 <- n_distinct(IDH$ISO)
unique_isocodes5 
[1] 103
unique_isocodes6<- n_distinct(AccesoElectricidad$`Country Code`)
unique_isocodes6
[1] 266
AccesoElectricidad<- AccesoElectricidad %>%
  semi_join(IDH, by = c("Country Code" = "ISO"))
AccesoElectricidad <- AccesoElectricidad %>% select(c(-3,-4))
AccesoElectricidad <- AccesoElectricidad %>%
  pivot_longer(
    cols = 3:ncol(.),           # Desde la 3ra columna hasta el final (los años)
    names_to = "year",          # Nueva columna con los años
    values_to = "AccesoElectricidad"   # Nueva columna con los valores del indicador
  ) %>%
  mutate(year = as.numeric(year))  # Convierte los años a numéricos

AccesoElectricidad <- AccesoElectricidad %>%
  left_join(CARACT %>% select(ISO, CONTINENTE) %>% distinct(), by = c("Country Code" = "ISO"))
AccesoElectricidad<-AccesoElectricidad %>%
  rename(ISO = `Country Code`)
CARACT$year <- as.numeric(CARACT$year)
AccesoElectricidadOyear <- as.numeric(AccesoElectricidad$year)
names(CARACT)
 [1] "ISO"         "country"     "year"        "CONTINENTE"  "CO2"        
 [6] "CHI"         "GII"         "IE"          "II"          "ILE"        
[11] "LFPF"        "LFPM"        "MFC"         "MMR"         "SSPF"       
[16] "SSPM"        "gasto_salud"
names(AccesoElectricidad)
[1] "Country Name"       "ISO"                "year"              
[4] "AccesoElectricidad" "CONTINENTE"        
AccesoElectricidad <- AccesoElectricidad %>%
  filter(!(year >= 1960 & year <= 1999))
CARACT <- CARACT %>%
   left_join(AccesoElectricidad, by = c("ISO", "year"))
CARACT<- CARACT %>%
  select(-CONTINENTE.y) %>%  # borra la que no quieres
  rename(CONTINENTE = CONTINENTE.x)  # renombra la que te quedas
CARACT <- CARACT %>% #borro columnas
  select(-18)
ISOCODE <- unique(IDH_filtrado$ISO)
# Imprimir la lista de códigos de país combinados
print(ISOCODE)#hay103
  [1] "AGO" "ALB" "ARM" "AUT" "AZE" "BDI" "BEL" "BEN" "BFA" "BGR" "BIH" "BLR"
 [13] "BWA" "CAF" "CHE" "CIV" "CMR" "COD" "COG" "COM" "CPV" "CYP" "CZE" "DEU"
 [25] "DJI" "DNK" "DZA" "EGY" "ERI" "ESP" "EST" "ETH" "FIN" "FRA" "GAB" "GBR"
 [37] "GEO" "GHA" "GIN" "GMB" "GNB" "GNQ" "GRC" "HRV" "HUN" "IRL" "ISL" "ITA"
 [49] "KAZ" "KEN" "LBR" "LBY" "LIE" "LSO" "LTU" "LUX" "LVA" "MAR" "MDA" "MDG"
 [61] "MKD" "MLI" "MLT" "MNE" "MOZ" "MRT" "MUS" "MWI" "NAM" "NER" "NGA" "NLD"
 [73] "NOR" "POL" "PRT" "ROU" "RUS" "RWA" "SDN" "SEN" "SLE" "SMR" "SOM" "SRB"
 [85] "SSD" "STP" "SVK" "SVN" "SWE" "SWZ" "SYC" "TCD" "TGO" "TUN" "TUR" "TZA"
 [97] "UGA" "UKR" "ZAF" "ZMB" "ZWE"
isocodes_no_en_paises <- setdiff(ISOCODE, paisesmapa$iso3)
paises_filtrados <- paisesmapa[paisesmapa$iso3 %in% IDH_filtrado$ISO, ]
paises_filtrados <- paises_filtrados  %>% arrange(paises_filtrados[2])
print(ISOCODE)
  [1] "AGO" "ALB" "ARM" "AUT" "AZE" "BDI" "BEL" "BEN" "BFA" "BGR" "BIH" "BLR"
 [13] "BWA" "CAF" "CHE" "CIV" "CMR" "COD" "COG" "COM" "CPV" "CYP" "CZE" "DEU"
 [25] "DJI" "DNK" "DZA" "EGY" "ERI" "ESP" "EST" "ETH" "FIN" "FRA" "GAB" "GBR"
 [37] "GEO" "GHA" "GIN" "GMB" "GNB" "GNQ" "GRC" "HRV" "HUN" "IRL" "ISL" "ITA"
 [49] "KAZ" "KEN" "LBR" "LBY" "LIE" "LSO" "LTU" "LUX" "LVA" "MAR" "MDA" "MDG"
 [61] "MKD" "MLI" "MLT" "MNE" "MOZ" "MRT" "MUS" "MWI" "NAM" "NER" "NGA" "NLD"
 [73] "NOR" "POL" "PRT" "ROU" "RUS" "RWA" "SDN" "SEN" "SLE" "SMR" "SOM" "SRB"
 [85] "SSD" "STP" "SVK" "SVN" "SWE" "SWZ" "SYC" "TCD" "TGO" "TUN" "TUR" "TZA"
 [97] "UGA" "UKR" "ZAF" "ZMB" "ZWE"
# borro de tu base de datos paises que no necesito 
head(paises_filtrados)
Simple feature collection with 6 features and 9 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 9.53357 ymin: -18.01639 xmax: 51.67701 ymax: 49.01875
Geodetic CRS:  WGS 84
                                               geo_point_2d iso3       status
1 { "lon": 17.544675786366358, "lat": -12.295285224744552 }  AGO Member State
2  { "lon": 20.068384605918776, "lat": 41.142284823416894 }  ALB Member State
3  { "lon": 44.946823543969643, "lat": 40.286619877420861 }  ARM Member State
4   { "lon": 14.14019348879525, "lat": 47.592902606915196 }  AUT Member State
5   { "lon": 48.819879230357841, "lat": 40.29691565933846 }  AZE Member State
6 { "lon": 29.887145470708866, "lat": -3.3561748083588876 }  BDI Member State
  color_code       name continent          region iso_3166_1_alpha_2_codes
1        AGO     Angola    Africa   Middle Africa                       AO
2        ALB    Albania    Europe Southern Europe                       AL
3        ARM    Armenia      Asia    Western Asia                       AM
4        AUT    Austria    Europe  Western Europe                       AT
5        AZE Azerbaijan      Asia    Western Asia                       AZ
6        BDI    Burundi    Africa  Eastern Africa                       BI
  french_short                       geometry
1       Angola MULTIPOLYGON (((23.98621 -1...
2      Albanie MULTIPOLYGON (((20.07142 42...
3      Arménie MULTIPOLYGON (((46.54038 38...
4     Autriche MULTIPOLYGON (((16.94618 48...
5  Azerbaïdjan MULTIPOLYGON (((46.17825 38...
6      Burundi MULTIPOLYGON (((30.57333 -2...
AGUAPOTABLE <- AGUAPOTABLE%>%
  arrange(AGUAPOTABLE[[1]])
AGUAPOTABLE <- AGUAPOTABLE %>%
  semi_join(IDH, by = c("Country Code" = "ISO"))
AGUAPOTABLE <- AGUAPOTABLE %>%
  pivot_longer(
    cols = 3:ncol(.),           # Desde la 3ra columna hasta el final (los años)
    names_to = "year",          # Nueva columna con los años
    values_to = "AGUAPOTABLE"   # Nueva columna con los valores del indicador
  ) %>%
  mutate(year = as.numeric(year))  # Convierte los años a numéricos

AGUAPOTABLE <- AGUAPOTABLE %>%
  left_join(CARACT %>% select(ISO, CONTINENTE) %>% distinct(), by = c("Country Code" = "ISO"))
AGUAPOTABLE <- AGUAPOTABLE %>%
  rename(ISO = `Country Code`)
CARACT$year <- as.numeric(CARACT$year)
AGUAPOTABLE$year <- as.numeric(AGUAPOTABLE$year)
names(CARACT)
 [1] "ISO"                "country"            "year"              
 [4] "CONTINENTE"         "CO2"                "CHI"               
 [7] "GII"                "IE"                 "II"                
[10] "ILE"                "LFPF"               "LFPM"              
[13] "MFC"                "MMR"                "SSPF"              
[16] "SSPM"               "gasto_salud"        "AccesoElectricidad"
names(AGUAPOTABLE)
[1] "Country Name" "ISO"          "year"         "AGUAPOTABLE"  "CONTINENTE"  
CARACT <- CARACT %>%
   left_join(AGUAPOTABLE, by = c("ISO", "year"))
CARACT<- CARACT %>%
  select(-CONTINENTE.y) %>%  # borra la que no quieres
  rename(CONTINENTE = CONTINENTE.x)  # renombra la que te quedas
CARACT <- CARACT %>% #borro columnas
  select(-19)
# Ordena los datos por la primera columna (Country Name)
CPI <- CPI %>%
  arrange(CPI[[1]])

# Realiza un semi_join con el dataframe IDH usando la columna 'ISO' en CPI y 'ISO' en IDH
CPI <- CPI %>%
  semi_join(IDH, by = c("ISO" = "ISO"))

# Transformar los datos de CPI a formato largo (long format), de modo que cada año se convierta en una fila
CPI <- CPI %>%
  pivot_longer(
    cols = 3:ncol(.),          # Desde la 3ra columna hasta el final (los años de CPI)
    names_to = "year",         # Nueva columna con los años
    values_to = "CPI"          # Nueva columna con los valores del CPI
  ) %>%
  mutate(year = as.numeric(year))  # Convierte los años a numéricos

# Si tienes un dataframe CARACT con la columna ISO y CONTINENTE, lo unimos
CPI <- CPI %>%
  left_join(CARACT %>% select(ISO, CONTINENTE) %>% distinct(), by = c("ISO" = "ISO"))

# Renombramos la columna 'Country Code' a 'ISO' si es necesario (en este caso no es necesario)
# CPI <- CPI %>%
#   rename(ISO = `Country Code`)   # Solo haz esto si la columna 'ISO' en CPI necesita ser renombrada

# Aseguramos que 'year' en CARACT y CPI esté en formato numérico
CARACT$year <- as.numeric(CARACT$year)
CPI$year <- as.numeric(CPI$year)
CARACT <- CARACT %>%
   left_join(CPI, by = c("ISO", "year"))
CARACT<- CARACT %>%
  select(-CONTINENTE.y) %>%  # borra la que no quieres
  rename(CONTINENTE = CONTINENTE.x)  # renombra la que te quedas
CARACT <- CARACT %>% #borro columnas
  select(-20)

Para podemos utilzar una distribución beta debemos comprobar que no hay valores 0 o 1.

sum(IDH$HDI == 1)
[1] NA
sum(IDH$HDI == 0)
[1] NA

Analisis Exploratorio

media_por_continente <- IDH %>%
  group_by(CONTINENTE, year) %>%
  summarise(media_idh = mean(HDI, na.rm = TRUE), .groups = "drop")

# Convertir a dos arrays separados (uno por continente)
europa <- media_por_continente %>%
  filter(CONTINENTE == "Europa") %>%
  pull(media_idh)

africa <- media_por_continente %>%
  filter(CONTINENTE == "África") %>%
  pull(media_idh)
CARACT <- CARACT %>%
  filter(!(year >= 1990 & year <= 1999))
# Combinar los data frames por la columna común 'País'
combined_data <- merge(IDH, CARACT, by = c('country', 'year','CONTINENTE','ISO'))
# Crear el gráfico de puntos
africa_data <- combined_data[combined_data$CONTINENTE == "AFRICA", ]
europa_data <- combined_data[combined_data$CONTINENTE == "EUROPA", ]
africa_subset <- africa_data %>%
  select(7,11,13,17:26)
europa_subset <- europa_data %>%
  select(7,11,13,17:26)
sumtable(
  africa_subset ,
  add.median = FALSE,     # opcional: si no quieres medianas
  title = "",             # sin título          # salida en consola
  summ = c("min(x)","pctile(x)[25]","pctile(x)[50]","mean(x)", "sd(x)","pctile(x)[75]",  "max(x)")  # puedes personalizar las estadísticas
)
Warning in attr(x, "align"): 'xfun::attr()' is deprecated.
Use 'xfun::attr2()' instead.
See help("Deprecated")
Variable Min Pctile[25] Pctile[50] Mean Sd Pctile[75] Max
HDI 0.26 0.43 0.5 0.52 0.12 0.59 0.81
CO2 0.019 0.14 0.36 1.1 1.9 1 11
GII 0.24 0.52 0.57 0.56 0.098 0.63 0.81
LFPF 6.7 39 55 54 19 70 93
LFPM 39 64 71 71 10 78 99
MFC 0.12 2.8 3.8 4.9 3.2 6.2 26
MMR 3.1 233 443 479 332 627 1687
SSPF 0.01 9.7 15 18 11 24 58
SSPM 42 76 85 82 11 90 100
gasto_salud 0.18 5.5 11 48 84 47 611
AccesoElectricidad 0.8 18 41 44 30 65 100
AGUAPOTABLE 19 51 64 65 18 78 100
CPI 8 22 29 31 11 37 70
sumtable(
  europa_subset,
  add.median = FALSE,     # opcional: si no quieres medianas
  title = "",             # sin título          # salida en consola
  summ = c("min(x)","pctile(x)[25]","pctile(x)[50]","mean(x)", "sd(x)","pctile(x)[75]",  "max(x)")  # puedes personalizar las estadísticas
)
Warning in attr(x, "align"): 'xfun::attr()' is deprecated.
Use 'xfun::attr2()' instead.
See help("Deprecated")
Variable Min Pctile[25] Pctile[50] Mean Sd Pctile[75] Max
HDI 0.64 0.79 0.86 0.84 0.073 0.9 0.97
CO2 0.82 4.4 6.2 6.9 3.6 8.8 26
GII 0.009 0.095 0.16 0.17 0.1 0.23 0.54
LFPF 23 47 53 53 9.8 58 86
LFPM 41 63 67 67 6.5 71 88
MFC 3.3 13 17 20 9.3 26 61
MMR 1.1 5.4 8.1 12 11 14 68
SSPF 3.1 14 21 23 11 31 48
SSPM 52 69 79 77 11 86 97
gasto_salud 6 247 851 1590 1687 2718 7871
AccesoElectricidad 88 100 100 100 1.4 100 100
AGUAPOTABLE 73 97 100 98 3.6 100 100
CPI 15 38 53 56 22 75 100
ggplot(africa_data, aes(x = as.factor(year), y = HDI)) +
  # Violin pastel
  geom_violin(fill = "#D0E6F6", color = NA, alpha = 0.9, trim = FALSE) +
  # Boxplot más ancho y con línea más gruesa
  geom_boxplot(
    width = 0.4,                     # más ancho (0.4 en lugar de 0.2)
    fill = NA, 
    color = "#5B8DB8", 
    outlier.shape = NA,
    linewidth = 0.8                  # línea más gruesa
  ) +
  # Puntos
  geom_jitter(width = 0.15, size = 1.3, color = "#2C5F8A", alpha = 0.6) +
  # Mediana en rojo pastel
  stat_summary(
    fun = median, geom = "point", shape = 21, size = 2.5,
    fill = "#F08080", color = "black", stroke = 0.6
  ) +
  labs(
    x = "Año", y = "Índice de Desarrollo Humano"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
    plot.title = element_text(face = "bold", size = 14),
    axis.title = element_text(size = 12)
  )
Warning: Removed 55 rows containing non-finite outside the scale range
(`stat_ydensity()`).
Warning: Removed 55 rows containing non-finite outside the scale range
(`stat_boxplot()`).
Warning: Removed 55 rows containing non-finite outside the scale range
(`stat_summary()`).
Warning: Removed 55 rows containing missing values or values outside the scale range
(`geom_point()`).

ggplot(europa_data, aes(x = as.factor(year), y = HDI)) +
  # Violin pastel
  geom_violin(fill = "#D0E6F6", color = NA, alpha = 0.9, trim = FALSE) +
  # Boxplot más ancho y con línea más gruesa
  geom_boxplot(
    width = 0.4,                     # más ancho (0.4 en lugar de 0.2)
    fill = NA, 
    color = "#5B8DB8", 
    outlier.shape = NA,
    linewidth = 0.8                  # línea más gruesa
  ) +
  # Puntos
  geom_jitter(width = 0.15, size = 1.3, color = "#2C5F8A", alpha = 0.6) +
  # Mediana en rojo pastel
  stat_summary(
    fun = median, geom = "point", shape = 21, size = 2.5,
    fill = "#F08080", color = "black", stroke = 0.6
  ) +
  labs(
    x = "Año", y = "Índice de Desarrollo Humano"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
    plot.title = element_text(face = "bold", size = 14),
    axis.title = element_text(size = 12)
  )
Warning: Removed 3 rows containing non-finite outside the scale range
(`stat_ydensity()`).
Warning: Removed 3 rows containing non-finite outside the scale range
(`stat_boxplot()`).
Warning: Removed 3 rows containing non-finite outside the scale range
(`stat_summary()`).
Warning: Removed 3 rows containing missing values or values outside the scale range
(`geom_point()`).

ggplot(europa_data, aes(x = as.factor(year), y = HDI)) +
  # Violin pastel
  geom_violin(fill = "#D0E6F6", color = NA, alpha = 0.9, trim = FALSE) +
  # Boxplot más ancho y con línea más gruesa
  geom_boxplot(
    width = 0.4,                     # más ancho (0.4 en lugar de 0.2)
    fill = NA, 
    color = "#5B8DB8", 
    outlier.shape = NA,
    linewidth = 0.8                  # línea más gruesa
  ) +
  # Puntos
  geom_jitter(width = 0.15, size = 1.3, color = "#2C5F8A", alpha = 0.6) +
  # Mediana en rojo pastel
  stat_summary(
    fun = median, geom = "point", shape = 21, size = 2.5,
    fill = "#F08080", color = "black", stroke = 0.6
  ) +
  labs(
    x = "Año", y = "Índice de Desarrollo Humano"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
    plot.title = element_text(face = "bold", size = 14),
    axis.title = element_text(size = 12)
  )
Warning: Removed 3 rows containing non-finite outside the scale range
(`stat_ydensity()`).
Warning: Removed 3 rows containing non-finite outside the scale range
(`stat_boxplot()`).
Warning: Removed 3 rows containing non-finite outside the scale range
(`stat_summary()`).
Warning: Removed 3 rows containing missing values or values outside the scale range
(`geom_point()`).

hdi_quantiles <- africa_data %>%
  group_by(year) %>%
  summarise(
    Q1 = quantile(HDI, 0.25, na.rm = TRUE),
    Q2 = quantile(HDI, 0.50, na.rm = TRUE),
    Q3 = quantile(HDI, 0.75, na.rm = TRUE)
  ) %>%
  mutate(date = as.Date(paste0(year, "-01-01")))

# Paso 2: convertir a xts
hdi_xts <- xts(hdi_quantiles[, c("Q1", "Q2", "Q3")], order.by = hdi_quantiles$date)

# Paso 3: gráfico interactivo con estilo
dygraph(hdi_xts, main = "Distribución del HDI en África (cuantiles)") %>%
  dySeries("Q1", label = "Q1 (25%)", color = "#1b9e77") %>%
  dySeries("Q2", label = "Mediana", color = "#984ea3") %>%
  dySeries("Q3", label = "Q3 (75%)", color = "#377eb8") %>%
  dyOptions(strokeWidth = 2.5) %>%
  dyHighlight(
    highlightCircleSize = 5,
    highlightSeriesBackgroundAlpha = 0.2,
    highlightSeriesOpts = list(strokeWidth = 4)
  ) %>%
  dyRangeSelector(height = 30, strokeColor = "#666", fillColor = "#d0ebff")
hdi_quantiles1 <- europa_data %>%
  group_by(year) %>%
  summarise(
    Q1 = quantile(HDI, 0.25, na.rm = TRUE),
    Q2 = quantile(HDI, 0.50, na.rm = TRUE),
    Q3 = quantile(HDI, 0.75, na.rm = TRUE)
  ) %>%
  mutate(date = as.Date(paste0(year, "-01-01")))

# Paso 2: convertir a xts
hdi_xts <- xts(hdi_quantiles1[, c("Q1", "Q2", "Q3")], order.by = hdi_quantiles1$date)

# Paso 3: gráfico interactivo con estilo
dygraph(hdi_xts, main = "Distribución del HDI en Europa (cuantiles)") %>%
  dySeries("Q1", label = "Q1 (25%)", color = "#1b9e77") %>%
  dySeries("Q2", label = "Mediana", color = "#984ea3") %>%
  dySeries("Q3", label = "Q3 (75%)", color = "#377eb8") %>%
  dyOptions(strokeWidth = 2.5) %>%
  dyHighlight(
    highlightCircleSize = 5,
    highlightSeriesBackgroundAlpha = 0.2,
    highlightSeriesOpts = list(strokeWidth = 4)
  ) %>%
  dyRangeSelector(height = 30, strokeColor = "#666", fillColor = "#d0ebff")
media_por_continente2 <- IDH %>%
  group_by(CONTINENTE, year) %>%
  summarise(media_LEB = mean(LEB, na.rm = TRUE), .groups = "drop")

# Convertir a dos arrays separados (uno por continente)
europa2<- media_por_continente2 %>%
  filter(CONTINENTE == "Europa") %>%
  pull(media_LEB)

africa2 <- media_por_continente2 %>%
  filter(CONTINENTE == "África") %>%
  pull(media_LEB)

graficoLEBE<- ggplot(media_por_continente2 %>% filter(CONTINENTE == "EUROPA"), 
                         aes(x = year, y = media_LEB, group = CONTINENTE, color = CONTINENTE)) +
  geom_line() +
  theme_minimal() +theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  labs(title = "Europa", x = "Año", y = "LEB") +
  theme(legend.position = "none")

graficoLEBA <-ggplot(media_por_continente2 %>% filter(CONTINENTE == "AFRICA"), 
                         aes(x = year, y = media_LEB, group = CONTINENTE, color = CONTINENTE)) +
  geom_line() +
  theme_minimal() +theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  labs(title = "AFRICA", x = "Año", y = "LEB") +
  theme(legend.position = "none")
hdi_quantiles1 <- europa_data %>%
  group_by(year) %>%
  summarise(
    Q1 = quantile(LEB, 0.25, na.rm = TRUE),
    Q2 = quantile(LEB, 0.50, na.rm = TRUE),
    Q3 = quantile(LEB, 0.75, na.rm = TRUE)
  ) %>%
  mutate(date = as.Date(paste0(year, "-01-01")))

# Paso 2: convertir a xts
hdi_xts <- xts(hdi_quantiles1[, c("Q1", "Q2", "Q3")], order.by = hdi_quantiles1$date)

# Paso 3: gráfico interactivo con estilo
dygraph(hdi_xts, main = "Distribución de la esperanza de vida en Europa (cuantiles)") %>%
  dySeries("Q1", label = "Q1 (25%)", color = "#1b9e77") %>%
  dySeries("Q2", label = "Mediana", color = "#984ea3") %>%
  dySeries("Q3", label = "Q3 (75%)", color = "#377eb8") %>%
  dyOptions(strokeWidth = 2.5) %>%
  dyHighlight(
    highlightCircleSize = 5,
    highlightSeriesBackgroundAlpha = 0.2,
    highlightSeriesOpts = list(strokeWidth = 4)
  ) %>%
  dyRangeSelector(height = 30, strokeColor = "#666", fillColor = "#d0ebff")
hdi_quantiles1 <- europa_data %>%
  group_by(year) %>%
  summarise(
    Q1 = quantile(GNP, 0.25, na.rm = TRUE),
    Q2 = quantile(GNP, 0.50, na.rm = TRUE),
    Q3 = quantile(GNP, 0.75, na.rm = TRUE)
  ) %>%
  mutate(date = as.Date(paste0(year, "-01-01")))

# Paso 2: convertir a xts
hdi_xts <- xts(hdi_quantiles1[, c("Q1", "Q2", "Q3")], order.by = hdi_quantiles1$date)

# Paso 3: gráfico interactivo con estilo
dygraph(hdi_xts, main = "Distribución del ingreso nacional bruto en Europa (cuantiles)") %>%
  dySeries("Q1", label = "Q1 (25%)", color = "#1b9e77") %>%
  dySeries("Q2", label = "Mediana", color = "#984ea3") %>%
  dySeries("Q3", label = "Q3 (75%)", color = "#377eb8") %>%
  dyOptions(strokeWidth = 2.5) %>%
  dyHighlight(
    highlightCircleSize = 5,
    highlightSeriesBackgroundAlpha = 0.2,
    highlightSeriesOpts = list(strokeWidth = 4)
  ) %>%
  dyRangeSelector(height = 30, strokeColor = "#666", fillColor = "#d0ebff")
media_por_continente1 <- IDH %>%
  group_by(CONTINENTE, year) %>%
  summarise(media_GNP = mean(GNP, na.rm = TRUE), .groups = "drop")

# Convertir a dos arrays separados (uno por continente)
europa1<- media_por_continente1 %>%
  filter(CONTINENTE == "Europa") %>%
  pull(media_GNP)

africa1 <- media_por_continente1 %>%
  filter(CONTINENTE == "África") %>%
  pull(media_GNP)

graficoGNPE <-ggplot(media_por_continente1 %>% filter(CONTINENTE == "EUROPA"), 
                         aes(x = year, y = media_GNP, group = CONTINENTE, color = CONTINENTE)) +
  geom_line() +
  theme_minimal() +theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  labs(title = "Europa", x = "Año", y = "GNP") +
  theme(legend.position = "none")

graficoGNPA <-ggplot(media_por_continente1 %>% filter(CONTINENTE == "AFRICA"), 
                         aes(x = year, y = media_GNP, group = CONTINENTE, color = CONTINENTE)) +
  geom_line() +
  theme_minimal() +theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  labs(title = "AFRICA", x = "Año", y = "GNP") +
  theme(legend.position = "none")
graficoGNPE 

graficoGNPA 

europa_data$CHI <- NULL
europa_data$IE <- NULL
europa_data$ILE <- NULL
europa_data$II <- NULL
africa_data$CHI <- NULL
africa_data$II<- NULL
africa_data$ILE <- NULL
africa_data$IE <- NULL
africa_data$'HDI Rank' <- NULL
europa_data$'HDI Rank' <- NULL
europa_data <- na.omit(europa_data)
africa_data <- na.omit(africa_data)
library(corrplot)
corrplot 0.95 loaded

Attaching package: 'corrplot'
The following object is masked from 'package:pls':

    corrplot
cor_spearman_europa <- cor(europa_data[, 5:ncol(europa_data)], 
                           method = "pearson", 
                           use = "pairwise.complete.obs")
corrplot(cor_spearman_europa, method = "color", type = "upper", 
         tl.cex = 0.7, title = "Correlaciones de Spearman en Europa", 
         addCoef.col = "black", number.cex = 0.7, mar = c(0, 0, 2, 0))

cor_spearman <- cor(africa_data[, 5:ncol(africa_data)], 
                           method = "spearman", 
                           use = "pairwise.complete.obs")
corrplot(cor_spearman, method = "color", type = "upper", 
         tl.cex = 0.7, title = "Correlaciones de Spearman en África", 
         addCoef.col = "black", number.cex = 0.7, mar = c(0, 0, 2, 0))

MFCE<-ggplot(europa_data%>% mutate(year=as.numeric(year)), aes(x = MFC, y = HDI, color = year)) +
  geom_point(size = 2) +
  labs(x = "MFC", y = "IDH", color = "Año") +
  scale_color_gradient2(low="blue",midpoint = 2010,mid ="gray80" , high="red")+
  theme_bw()
MFCA<-ggplot(africa_data%>% mutate(year=as.numeric(year)), aes(x = MFC, y = HDI, color = year)) +
  geom_point(size = 2) +
  labs(x = "MFC", y = "IDH", color = "Año") +
  scale_color_gradient2(low="blue",midpoint = 2010,mid ="gray80" , high="red")+
  theme_bw()

MFCE

MFCA

GSE<-ggplot(europa_data%>% mutate(year=as.numeric(year)), aes(x = gasto_salud, y = HDI, color = year)) +
  geom_point(size = 2) +
  labs(x = "Gasto en Salud", y = "IDH", color = "Año") +
  scale_color_gradient2(low="blue",midpoint = 2010,mid ="gray80" , high="red")+
  theme_bw()
GSA<-ggplot(africa_data%>% mutate(year=as.numeric(year)), aes(x = gasto_salud, y = HDI, color = year)) +
  geom_point(size = 2) +
  labs(x = "Gasto en Salud", y = "IDH", color = "Año") +
  scale_color_gradient2(low="blue",midpoint = 2010,mid ="gray80" , high="red")+
  theme_bw()
GSE

GSA

### Cambia los gráficos a esto
GSEAlirio<-ggplot(europa_data%>% mutate(year=as.numeric(year)), aes(x = gasto_salud, y = HDI, color = year)) +
  geom_point(size = 2) +
  labs(x = "Gasto en Salud", y = "IDH", color = "Año") +
  scale_color_gradient2(low="blue",midpoint = 2010,mid ="gray80" , high="red")+
  theme_minimal()
GSEAlirio

GSEA<-ggplot(europa_data%>% mutate(year=as.numeric(year)), aes(x = gasto_salud, y = HDI, color = year)) +
  geom_point(size = 2) +
  labs(x = "Gasto en Salud", y = "IDH", color = "Año") +
  scale_color_gradient2(low="blue",midpoint = 2010,mid ="gray80" , high="red")+
  theme_bw()
GSEAlirio

GSAL<-ggplot(africa_data%>% mutate(year=as.numeric(year)), aes(x = log(gasto_salud), y = HDI, color = year)) +
  geom_point(size = 2) +
  geom_smooth(method = "lm", se = FALSE, color = "black")+
  labs(x = "Gasto en salud", y = "IDH", color = "Año") +
  scale_color_gradient2(low="blue",midpoint = 2010,mid ="gray80" , high="red")+
  theme_bw()
GSAL
`geom_smooth()` using formula = 'y ~ x'

LMAGS <- lm(HDI ~ log(gasto_salud),
                data =africa_data)
summary(LMAGS)

Call:
lm(formula = HDI ~ log(gasto_salud), data = africa_data)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.160518 -0.031738  0.001597  0.036008  0.187612 

Coefficients:
                 Estimate Std. Error t value Pr(>|t|)    
(Intercept)      0.338185   0.004970   68.05   <2e-16 ***
log(gasto_salud) 0.065402   0.001584   41.30   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.05513 on 675 degrees of freedom
Multiple R-squared:  0.7165,    Adjusted R-squared:  0.716 
F-statistic:  1706 on 1 and 675 DF,  p-value: < 2.2e-16
plot(LMAGS)

residuos_dfGSA <- data.frame(
  residuos_est = rstandard(LMAGS)
)
QQPLOTA<- ggplot(residuos_dfGSA, aes(sample = residuos_est)) +
  stat_qq(color = "#0072B2", size = 2, alpha = 0.7) +          # puntos
  stat_qq_line(color = "#D55E00", linewidth = 1.2, linetype = "dashed") +  # línea de referencia
  labs(title = "QQ Plot de los residuos estandarizados",
       x = "Cuantiles teóricos",
       y = "Cuantiles de los residuos") +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    panel.grid.major = element_line(color = "gray90")
  )
QQPLOTA

residuosGSA <- data.frame(
  fitted = fitted(LMAGS),
  resid = resid(LMAGS)
)

ResidualsvsfittedLMAGS <- ggplot(residuosGSA, aes(x = fitted, y = resid)) +
  geom_point(color = "#0072B2", alpha = 0.6, size = 2) +         # puntos
  geom_smooth(method = "loess", se = FALSE, color = "#D55E00", linewidth = 1.2) +  # curva de tendencia
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray40") +  # línea base
  labs(title = "Residuals vs Fitted Values",
       x = "Fitted values",
       y = "Residuals") +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    panel.grid.major = element_line(color = "gray90")
  )
ResidualsvsfittedLMAGS
`geom_smooth()` using formula = 'y ~ x'

library(lmtest)
bptest(LMAGS)

    studentized Breusch-Pagan test

data:  LMAGS
BP = 0.82498, df = 1, p-value = 0.3637
GSEL<-ggplot(europa_data%>% mutate(year=as.numeric(year)), aes(x = log(gasto_salud), y = HDI, color = year)) +
  geom_point(size = 2) +
  geom_smooth(method = "lm", se = FALSE, color = "black")+
  labs(x = "Gasto en salud", y = "IDH", color = "Año") +
  scale_color_gradient2(low="blue",midpoint = 2010,mid ="gray80" , high="red")+
  theme_bw()

GSEL
`geom_smooth()` using formula = 'y ~ x'

LMEGS <- lm(HDI ~ log(gasto_salud),
                data =europa_data)
summary(LMEGS)

Call:
lm(formula = HDI ~ log(gasto_salud), data = europa_data)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.096565 -0.013376  0.001773  0.017511  0.059315 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)      0.5485868  0.0038796  141.40   <2e-16 ***
log(gasto_salud) 0.0447092  0.0005713   78.26   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.02609 on 900 degrees of freedom
Multiple R-squared:  0.8719,    Adjusted R-squared:  0.8717 
F-statistic:  6124 on 1 and 900 DF,  p-value: < 2.2e-16
plot(LMEGS)

bptest(LMEGS)

    studentized Breusch-Pagan test

data:  LMEGS
BP = 37.641, df = 1, p-value = 8.502e-10
residuos_dfGSE <- data.frame(
  residuos_est = rstandard(LMEGS)
)
QQPLOT<- ggplot(residuos_dfGSE, aes(sample = residuos_est)) +
  stat_qq(color = "#0072B2", size = 2, alpha = 0.7) +          # puntos
  stat_qq_line(color = "#D55E00", linewidth = 1.2, linetype = "dashed") +  # línea de referencia
  labs(title = "QQ Plot de los residuos estandarizados",
       x = "Cuantiles teóricos",
       y = "Cuantiles de los residuos") +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    panel.grid.major = element_line(color = "gray90")
  )
residuosGSE <- data.frame(
  fitted = fitted(LMEGS),
  resid = resid(LMEGS)
)

ResidualsvsfittedLMEGS <- ggplot(residuosGSE, aes(x = fitted, y = resid)) +
  geom_point(color = "#0072B2", alpha = 0.6, size = 2) +         # puntos
  geom_smooth(method = "loess", se = FALSE, color = "#D55E00", linewidth = 1.2) +  # curva de tendencia
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray40") +  # línea base
  labs(title = "Residuals vs Fitted Values",
       x = "Fitted values",
       y = "Residuals") +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    panel.grid.major = element_line(color = "gray90")
  )
MMRA<-ggplot(africa_data%>% mutate(year=as.numeric(year)), aes(x = MMR, y = HDI, color = year)) +
  geom_point(size = 2) +
  labs(x = "MMR", y = "IDH", color = "Año") +
  scale_color_gradient2(low="blue",midpoint = 2010,mid ="gray80" , high="red")+
  theme_bw()
MMRE<-ggplot(europa_data%>% mutate(year=as.numeric(year)), aes(x = MMR, y = HDI, color = year)) +
  geom_point(size = 2) +
  labs(x = "MMR", y = "IDH", color = "Año") +
  scale_color_gradient2(low="blue",midpoint = 2010,mid ="gray80" , high="red")+
  theme_bw()
MMRA

MMRE

GIIA<-ggplot(africa_data%>% mutate(year=as.numeric(year)), aes(x = GII, y = HDI, color = year)) +
  geom_point(size = 2) +
  labs(x = "GII", y = "IDH", color = "Año") +
  scale_color_gradient2(low="blue",midpoint = 2010,mid ="gray80" , high="red")+
  theme_bw()
GIIE<-ggplot(europa_data%>% mutate(year=as.numeric(year)), aes(x = GII, y = HDI, color = year)) +
  geom_point(size = 2) +
  labs(x = "GII", y = "IDH", color = "Año") +
  scale_color_gradient2(low="blue",midpoint = 2010,mid ="gray80" , high="red")+
  theme_bw()
GIIA

GIIE

LMGIIA <-lm(HDI ~ GII,data =africa_data)
plot(LMGIIA)

summary(LMGIIA)

Call:
lm(formula = HDI ~ GII, data = africa_data)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.211161 -0.044161  0.000891  0.051845  0.162640 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.98933    0.01852   53.43   <2e-16 ***
GII         -0.82495    0.03243  -25.44   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.07398 on 675 degrees of freedom
Multiple R-squared:  0.4895,    Adjusted R-squared:  0.4887 
F-statistic: 647.2 on 1 and 675 DF,  p-value: < 2.2e-16
APA<-ggplot(africa_data%>% mutate(year=as.numeric(year)), aes(x = AGUAPOTABLE, y = HDI, color = year)) +
  geom_point(size = 2) +
  labs(x = "Agua potable", y = "IDH", color = "Año") +
  scale_color_gradient2(low="blue",midpoint = 2010,mid ="gray80" , high="red")+
  theme_bw()
APE<-ggplot(europa_data%>% mutate(year=as.numeric(year)), aes(x = AGUAPOTABLE, y = HDI, color = year)) +
  geom_point(size = 2) +
  labs(x = "Agua potable", y = "IDH", color = "Año") +
  scale_color_gradient2(low="blue",midpoint = 2010,mid ="gray80" , high="red")+
  theme_bw()
APA

APE

CPIE<-ggplot(europa_data%>% mutate(year=as.numeric(year)), aes(x = CPI, y = HDI, color = year)) +
  geom_point(size = 2) +
  labs(x = "CPI", y = "IDH", color = "Año") +
  scale_color_gradient2(low="blue",midpoint = 2010,mid ="gray80" , high="red")+
  theme_bw()
CPIE

SSPFE<-ggplot(europa_data%>% mutate(year=as.numeric(year)), aes(x = SSPF, y = HDI, color = year)) +
  geom_point(size = 2) +
  labs(x = "SSPF", y = "IDH", color = "Año") +
  scale_color_gradient2(low="blue",midpoint = 2010,mid ="gray80" , high="red")+
  theme_bw()
SSPFE

LFPFA<-ggplot(africa_data%>% mutate(year=as.numeric(year)), aes(x = LFPF, y = HDI, color = year)) +
  geom_point(size = 2) +
  labs(x = "LFPF", y = "IDH", color = "Año") +
  scale_color_gradient2(low="blue",midpoint = 2010,mid ="gray80" , high="red")+
  theme_bw()
AEA<-ggplot(africa_data%>% mutate(year=as.numeric(year)), aes(x = AccesoElectricidad, y = HDI, color = year)) +
  geom_point(size = 2) +
  labs(x = "Acceso electricidad", y = "IDH", color = "Año") +
  scale_color_gradient2(low="blue",midpoint = 2010,mid ="gray80" , high="red")+
  theme_bw()
AEA

CO2A<-ggplot(africa_data%>% mutate(year=as.numeric(year)), aes(x = CO2, y = HDI, color = year)) +
  geom_point(size = 2) +
  labs(x = "CO2", y = "IDH", color = "Año") +
  scale_color_gradient2(low="blue",midpoint = 2010,mid ="gray80" , high="red")+
  theme_bw()
CO2A

CO2E<-ggplot(europa_data%>% mutate(year=as.numeric(year)), aes(x = CO2, y = HDI, color = year)) +
  geom_point(size = 2) +
  labs(x = "CO2", y = "IDH", color = "Año") +
  scale_color_gradient2(low="blue",midpoint = 2010,mid ="gray80" , high="red")+
  theme_bw()
CO2AL<-ggplot(africa_data%>% mutate(year=as.numeric(year)), aes(x = log(CO2), y = HDI, color = year)) +
  geom_point(size = 2) +
  geom_smooth(method = "lm", se = FALSE, color = "black")+
  labs(x = "CO2", y = "IDH", color = "Año") +
  scale_color_gradient2(low="blue",midpoint = 2010,mid ="gray80" , high="red")+
  theme_bw()
CO2AL
`geom_smooth()` using formula = 'y ~ x'

LMCO2A <-lm(HDI ~ log(CO2),data =africa_data)
summary(LMCO2A)

Call:
lm(formula = HDI ~ log(CO2), data = africa_data)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.131950 -0.042368  0.003164  0.042547  0.104047 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 0.584119   0.002391  244.25   <2e-16 ***
log(CO2)    0.063955   0.001416   45.16   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.05163 on 675 degrees of freedom
Multiple R-squared:  0.7513,    Adjusted R-squared:  0.7509 
F-statistic:  2039 on 1 and 675 DF,  p-value: < 2.2e-16
plot(LMCO2A)

residuos_dfCO2A <- data.frame(
  residuos_est = rstandard(LMCO2A)
)
QQPLOTCO2<- ggplot(residuos_dfCO2A, aes(sample = residuos_est)) +
  stat_qq(color = "#0072B2", size = 2, alpha = 0.7) +          # puntos
  stat_qq_line(color = "#D55E00", linewidth = 1.2, linetype = "dashed") +  # línea de referencia
  labs(title = "QQ Plot de los residuos estandarizados",
       x = "Cuantiles teóricos",
       y = "Cuantiles de los residuos") +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    panel.grid.major = element_line(color = "gray90")
  )
residuosLMCO2A <- data.frame(
  fitted = fitted(LMCO2A),
  resid = resid(LMCO2A)
)

ResidualsvsfittedCO2A <- ggplot(residuosLMCO2A, aes(x = fitted, y = resid)) +
  geom_point(color = "#0072B2", alpha = 0.6, size = 2) +         # puntos
  geom_smooth(method = "loess", se = FALSE, color = "#D55E00", linewidth = 1.2) +  # curva de tendencia
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray40") +  # línea base
  labs(title = "Residuals vs Fitted Values",
       x = "Fitted values",
       y = "Residuals") +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    panel.grid.major = element_line(color = "gray90")
  )

MODELOS

Modelos Lineales Generalizados

Modelo de Europa

modelo_glmEuropa33 <- glmmTMB(HDI ~ CPI + GII + LFPF + log(gasto_salud) + AGUAPOTABLE + SSPF,
                           family = beta_family(),
                           data = europa_data)

r2(modelo_glmEuropa33)
# R2 for Generalized Linear Regression
  Ferrari's R2: 0.889
summary(modelo_glmEuropa33)
 Family: beta  ( logit )
Formula:          
HDI ~ CPI + GII + LFPF + log(gasto_salud) + AGUAPOTABLE + SSPF
Data: europa_data

      AIC       BIC    logLik -2*log(L)  df.resid 
  -4345.5   -4307.1    2180.8   -4361.5       894 


Dispersion parameter for beta family ():  248 

Conditional model:
                   Estimate Std. Error z value Pr(>|z|)    
(Intercept)      -0.8152344  0.1918720  -4.249 2.15e-05 ***
CPI               0.0071228  0.0006031  11.811  < 2e-16 ***
GII              -0.8190036  0.1275779  -6.420 1.37e-10 ***
LFPF              0.0098734  0.0006628  14.896  < 2e-16 ***
log(gasto_salud)  0.1913911  0.0100084  19.123  < 2e-16 ***
AGUAPOTABLE       0.0061755  0.0019704   3.134  0.00172 ** 
SSPF             -0.0021497  0.0010521  -2.043  0.04102 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Analizamos los residuos del modelo

# Data frame con residuos y valores ajustados
df_residuos1 <- data.frame(
  Ajustados = fitted(modelo_glmEuropa33),
  Residuos = resid(modelo_glmEuropa33, type = "pearson")
)

# Gráfico mejorado
ggplot(df_residuos1, aes(x = Ajustados, y = Residuos)) +
  geom_point(alpha = 0.7, color = "#0072B2") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  labs(
    title="Residuos de Pearson vs. Valores Ajustados",
    x = "Valores ajustados",
    y = "Residuos de Pearson"
  ) +
  theme_minimal(base_size = 13)

ggplot(df_residuos1, aes(sample = Residuos)) +
     stat_qq(alpha = 0.7, color = "#009E73") +
     stat_qq_line(color = "red", linetype = "dotted") +
    labs(
        title = "Q-Q Plot de los Residuos de Pearson",
         x = "Cuantiles teóricos",
         y = "Cuantiles de los residuos"
     ) +
  theme_minimal(base_size = 13)

library(qqplotr)

Attaching package: 'qqplotr'
The following objects are masked from 'package:ggplot2':

    stat_qq_line, StatQqLine
ggplot(df_residuos1, aes(sample = Residuos)) +
  stat_qq_band(distribution = "norm", alpha = 0.3, fill = "#56B4E9") +  # Banda de confianza
  stat_qq_point(alpha = 0.7, color = "#009E73") +
  stat_qq_line(distribution = "norm", color = "red", linetype = "dotted") +
  labs(
    x = "Cuantiles teóricos",
    y = "Cuantiles de los residuos"
  ) +
  theme_minimal(base_size = 13)

Multicolinealidad

check_collinearity(modelo_glmEuropa33)
# Check for Multicollinearity

Low Correlation

        Term  VIF   VIF 95% CI adj. VIF Tolerance Tolerance 95% CI
         CPI 4.43 [3.96, 4.98]     2.11      0.23     [0.20, 0.25]
        LFPF 1.27 [1.19, 1.39]     1.13      0.79     [0.72, 0.84]
 AGUAPOTABLE 2.06 [1.88, 2.28]     1.44      0.48     [0.44, 0.53]
        SSPF 3.41 [3.06, 3.82]     1.85      0.29     [0.26, 0.33]

Moderate Correlation

             Term  VIF   VIF 95% CI adj. VIF Tolerance Tolerance 95% CI
              GII 5.44 [4.85, 6.13]     2.33      0.18     [0.16, 0.21]
 log(gasto_salud) 7.34 [6.52, 8.29]     2.71      0.14     [0.12, 0.15]
# Calcula R2 para tu modelo
r2_glmmTMB <- r2(modelo_glmEuropa33)

print(r2_glmmTMB)
# R2 for Generalized Linear Regression
  Ferrari's R2: 0.889
modelo_glmAfrica33 <- glmmTMB(HDI ~ log(CO2)+AccesoElectricidad+GII+log(gasto_salud)+AGUAPOTABLE+MMR+MFC,
                           family = beta_family(),
                           data = europa_data)


summary(modelo_glmAfrica33)
 Family: beta  ( logit )
Formula:          
HDI ~ log(CO2) + AccesoElectricidad + GII + log(gasto_salud) +  
    AGUAPOTABLE + MMR + MFC
Data: europa_data

      AIC       BIC    logLik -2*log(L)  df.resid 
  -4189.1   -4145.9    2103.6   -4207.1       893 


Dispersion parameter for beta family ():  209 

Conditional model:
                     Estimate Std. Error z value Pr(>|z|)    
(Intercept)        -0.9585401  0.7124858  -1.345   0.1785    
log(CO2)           -0.0516430  0.0129213  -3.997 6.42e-05 ***
AccesoElectricidad  0.0073505  0.0060999   1.205   0.2282    
GII                -2.0797430  0.1209498 -17.195  < 2e-16 ***
log(gasto_salud)    0.2587653  0.0109988  23.527  < 2e-16 ***
AGUAPOTABLE         0.0048890  0.0023554   2.076   0.0379 *  
MMR                 0.0128496  0.0009288  13.835  < 2e-16 ***
MFC                 0.0057624  0.0012185   4.729 2.26e-06 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
r2(modelo_glmAfrica33)
# R2 for Generalized Linear Regression
  Ferrari's R2: 0.872
check_collinearity(modelo_glmAfrica33)
# Check for Multicollinearity

Low Correlation

               Term  VIF   VIF 95% CI adj. VIF Tolerance Tolerance 95% CI
           log(CO2) 1.70 [1.56, 1.88]     1.30      0.59     [0.53, 0.64]
 AccesoElectricidad 1.45 [1.34, 1.59]     1.20      0.69     [0.63, 0.75]
                GII 4.11 [3.68, 4.61]     2.03      0.24     [0.22, 0.27]
        AGUAPOTABLE 2.50 [2.26, 2.78]     1.58      0.40     [0.36, 0.44]
                MMR 3.22 [2.90, 3.60]     1.80      0.31     [0.28, 0.34]
                MFC 2.73 [2.47, 3.04]     1.65      0.37     [0.33, 0.41]

Moderate Correlation

             Term  VIF   VIF 95% CI adj. VIF Tolerance Tolerance 95% CI
 log(gasto_salud) 7.56 [6.71, 8.53]     2.75      0.13     [0.12, 0.15]
df_residuos2 <- data.frame(
  Ajustados = fitted(modelo_glmAfrica33),
  Residuos = resid(modelo_glmAfrica33, type = "pearson")
)

# Gráfico mejorado
ggplot(df_residuos2, aes(x = Ajustados, y = Residuos)) +
  geom_point(alpha = 0.7, color = "#0072B2") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  labs(
    title="Residuos de Pearson vs. Valores Ajustados",
    x = "Valores ajustados",
    y = "Residuos de Pearson"
  ) +
  theme_minimal(base_size = 13)

Modelos GAM

modelogamEuropa<- gam(HDI ~ CPI + GII  + s(MFC) + s(MMR,k=13) + s(SSPF,k=13) + log(gasto_salud) + s(AGUAPOTABLE,k=13) ,
              data = europa_data,
              family = betar(link = "logit"), method = "GCV")
summary(modelogamEuropa)

Family: Beta regression(312.133) 
Link function: logit 

Formula:
HDI ~ CPI + GII + s(MFC) + s(MMR, k = 13) + s(SSPF, k = 13) + 
    log(gasto_salud) + s(AGUAPOTABLE, k = 13)

Parametric coefficients:
                   Estimate Std. Error z value Pr(>|z|)    
(Intercept)       0.7214502  0.0763870   9.445   <2e-16 ***
CPI               0.0071121  0.0006593  10.787   <2e-16 ***
GII              -2.7078161  0.1645134 -16.460   <2e-16 ***
log(gasto_salud)  0.1716203  0.0110455  15.538   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Approximate significance of smooth terms:
                 edf Ref.df Chi.sq p-value    
s(MFC)         6.770  7.917  22.02 0.00357 ** 
s(MMR)         8.960 10.437 321.00 < 2e-16 ***
s(SSPF)        4.192  5.261  58.57 < 2e-16 ***
s(AGUAPOTABLE) 7.990  9.527  56.83 < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

R-sq.(adj) =  0.924   Deviance explained = 92.8%
-REML = -2219.8  Scale est. = 1         n = 902
plot(modelogamEuropa)

gam.check(modelogamEuropa)


Method: REML   Optimizer: outer newton
full convergence after 9 iterations.
Gradient range [-2.607205e-06,1.394465e-05]
(score -2219.806 & scale 1).
Hessian positive definite, eigenvalue range [0.6896907,431.8416].
Model rank =  49 / 49 

Basis dimension (k) checking results. Low p-value (k-index<1) may
indicate that k is too low, especially if edf is close to k'.

                  k'   edf k-index p-value    
s(MFC)          9.00  6.77    0.97    0.20    
s(MMR)         12.00  8.96    0.94    0.04 *  
s(SSPF)        12.00  4.19    0.56  <2e-16 ***
s(AGUAPOTABLE) 12.00  7.99    0.82  <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
concurvity(modelogamEuropa)
              para    s(MFC)    s(MMR)   s(SSPF) s(AGUAPOTABLE)
worst    0.9962226 0.8263533 0.7784543 0.8261311      0.7049942
observed 0.9962226 0.1706418 0.5387025 0.7809449      0.3124536
estimate 0.9962226 0.5555202 0.5185759 0.7128019      0.5000650
extractAIC(modelogamEuropa)
[1]    31.91242 -4535.81894
residuosgam <- data.frame(Residuos = residuals(modelogamEuropa, type = "deviance"))

# 3. Crear QQ-plot con ggplot2 + qqplotr
ggplot(residuosgam , aes(sample = Residuos)) +
  stat_qq_band(distribution = "norm", alpha = 0.3, fill = "#56B4E9") +  # Banda de confianza
  stat_qq_point(alpha = 0.7, color = "#009E73") +
  stat_qq_line(distribution = "norm", color = "red", linetype = "dotted") +
  labs(
    title = "QQ-plot de residuos del modelo GAM (Europa)",
    x = "Cuantiles teóricos",
    y = "Cuantiles de los residuos"
  ) +
  theme_minimal(base_size = 13)

residuosgam2 <- data.frame(
  Ajustados = fitted(modelogamEuropa),                      # Valores ajustados
  Residuos = residuals(modelogamEuropa, type = "pearson")   # Residuos de Pearson
)

# 2. Gráfico con ggplot2
ggplot(residuosgam2, aes(x = Ajustados, y = Residuos)) +
  geom_point(alpha = 0.7, color = "#0072B2") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  labs(
    
    x = "Valores ajustados",
    y = "Residuos de Pearson"
  ) +
  theme_minimal(base_size = 13)

pls_model <- plsr(HDI ~GII +CPI+log(gasto_salud), scale = TRUE, data = europa_data, validation = "CV")

# Ver cuántos componentes usar
summary(pls_model)
Data:   X dimension: 902 3 
    Y dimension: 902 1
Fit method: kernelpls
Number of components considered: 3

VALIDATION: RMSEP
Cross-validated using 10 random segments.
       (Intercept)  1 comps  2 comps  3 comps
CV         0.07288   0.0248  0.02360  0.02349
adjCV      0.07288   0.0248  0.02359  0.02349

TRAINING: % variance explained
     1 comps  2 comps  3 comps
X      87.30    92.44   100.00
HDI    88.43    89.60    89.67
scores_pls <- pls_model$scores[, 1:2]  # Usar los primeros dos componentes
#scores_pls 
#pls_model$loading.weights
europa_data$scores_1 <- scores_pls[, 1]  # Primer componente
europa_data$scores_2 <- scores_pls[, 2]   # Primer componente


gamgam_pls <- gam(HDI ~   s(SSPF, bs = "ps") + s(MMR, bs = "tp") + s(AGUAPOTABLE)+ +scores_1+ scores_2, 
                  data = europa_data, 
                  select = TRUE, 
                  family = betar(link = "logit"), 
                  method = "GCV")
gam.check(gamgam_pls)


Method: REML   Optimizer: outer newton
full convergence after 21 iterations.
Gradient range [-0.0002958453,0.0003322675]
(score -2209.515 & scale 1).
Hessian positive definite, eigenvalue range [0.1172235,438.8201].
Model rank =  30 / 30 

Basis dimension (k) checking results. Low p-value (k-index<1) may
indicate that k is too low, especially if edf is close to k'.

                 k'  edf k-index p-value    
s(SSPF)        9.00 3.46    0.55  <2e-16 ***
s(MMR)         9.00 7.63    0.95   0.065 .  
s(AGUAPOTABLE) 9.00 6.56    0.82  <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(gamgam_pls)

Family: Beta regression(291.291) 
Link function: logit 

Formula:
HDI ~ s(SSPF, bs = "ps") + s(MMR, bs = "tp") + s(AGUAPOTABLE) + 
    +scores_1 + scores_2

Parametric coefficients:
             Estimate Std. Error z value Pr(>|z|)    
(Intercept)  1.791050   0.005825 307.470   <2e-16 ***
scores_1     0.380092   0.008700  43.687   <2e-16 ***
scores_2    -0.001725   0.018262  -0.094    0.925    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Approximate significance of smooth terms:
                 edf Ref.df Chi.sq p-value    
s(SSPF)        3.464      9  43.04  <2e-16 ***
s(MMR)         7.629      9 293.61  <2e-16 ***
s(AGUAPOTABLE) 6.562      9  43.45  <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

R-sq.(adj) =  0.919   Deviance explained = 92.2%
-REML = -2209.5  Scale est. = 1         n = 902
concurvity(gamgam_pls)
               para   s(SSPF)    s(MMR) s(AGUAPOTABLE)
worst    9.9027e-23 0.7658453 0.6379702      0.6519328
observed 9.9027e-23 0.6854021 0.3459880      0.2054271
estimate 9.9027e-23 0.3085410 0.4290763      0.3832603
gamgam_pls2 <- gam(HDI ~ s(LFPF) + s(MMR) +scores_1,  
                  data = europa_data, 
                  select = TRUE, 
                  family = betar(link = "logit"), 
                  method = "REML")
summary(gamgam_pls2)

Family: Beta regression(272.679) 
Link function: logit 

Formula:
HDI ~ s(LFPF) + s(MMR) + scores_1

Parametric coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) 1.791317   0.006019  297.63   <2e-16 ***
scores_1    0.344998   0.006122   56.35   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Approximate significance of smooth terms:
          edf Ref.df Chi.sq p-value    
s(LFPF) 2.743      9  48.47  <2e-16 ***
s(MMR)  7.265      9 248.99  <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

R-sq.(adj) =  0.911   Deviance explained = 91.6%
-REML = -2195.5  Scale est. = 1         n = 902
concurvity(gamgam_pls2)
                 para   s(LFPF)    s(MMR)
worst    4.651427e-24 0.3309066 0.6880789
observed 4.651427e-24 0.2714976 0.2411544
estimate 4.651427e-24 0.1860038 0.3936039
gam.check(gamgam_pls2)


Method: REML   Optimizer: outer newton
full convergence after 7 iterations.
Gradient range [-6.124927e-05,0.0005363199]
(score -2195.54 & scale 1).
Hessian positive definite, eigenvalue range [3.540861e-06,444.0262].
Model rank =  20 / 20 

Basis dimension (k) checking results. Low p-value (k-index<1) may
indicate that k is too low, especially if edf is close to k'.

          k'  edf k-index p-value   
s(LFPF) 9.00 2.74    0.91   0.005 **
s(MMR)  9.00 7.26    0.93   0.040 * 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(gamgam_pls2)

library(fitdistrplus)
Loading required package: MASS

Attaching package: 'MASS'
The following object is masked from 'package:dplyr':

    select
Loading required package: survival
# Extraer la variable
idh <- europa_data$HDI

# Escalar a (0, 1) si no lo está (solo si necesario)
idh_scaled <- (idh - min(idh)) / (max(idh) - min(idh))

# Ajustar distribución beta
fit <- fitdist(idh_scaled, "beta")

# Curva de densidad beta ajustada
x_vals <- seq(0, 1, length.out = 500)
beta_curve <- dbeta(x_vals,
                    shape1 = fit$estimate["shape1"],
                    shape2 = fit$estimate["shape2"])
df_curve <- data.frame(x = x_vals, y = beta_curve)

# Graficar
ggplot(data.frame(idh_scaled), aes(x = idh_scaled)) +
  geom_histogram(aes(y = ..density..), bins = 30,
                 fill = "skyblue", color = "black", alpha = 0.7) +
 geom_line(data = df_curve, aes(x = x, y = y), color = "red", linewidth = 1.2) +
  labs(title = "Ajuste de una distribución beta al IDH",
       x = "IDH (escalado)", y = "Densidad")
Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
ℹ Please use `after_stat(density)` instead.

fit$estimate
  shape1   shape2 
2.518679 1.466924 
library(fitdistrplus)
library(goftest)


fit_beta <- fitdist(europa_data$HDI, "beta", method = "mle")

# Ver resumen del ajuste
summary(fit_beta)
Fitting of the distribution ' beta ' by maximum likelihood 
Parameters : 
        estimate Std. Error
shape1 20.384026  0.9729091
shape2  3.753954  0.1696027
Loglikelihood:  1140.685   AIC:  -2277.37   BIC:  -2267.76 
Correlation matrix:
          shape1    shape2
shape1 1.0000000 0.9237282
shape2 0.9237282 1.0000000
# Graficar el ajuste (opcional)
plot(fit_beta)

# Usar test de Anderson-Darling (más adecuado con ties)
ad_test <- ad.test(europa_data$HDI, null = function(x) pbeta(x,
    shape1 = fit_beta$estimate["shape1"],
    shape2 = fit_beta$estimate["shape2"]))

print(ad_test)

    Anderson-Darling test of goodness-of-fit
    Null hypothesis: distribution 'function(x) pbeta(x, shape1 =
    fit_beta$estimate["shape1"], shape2 = fit_beta$estimate["shape2"])'
    Parameters assumed to be fixed

data:  europa_data$HDI
An = 4.001, p-value = 0.008714